library(tidyverse)
library(wesanderson)
library(MetBrewer)
library(sp)
library(sf)
library(classInt)
library(raster)
library(fixest)
library(osmdata)


dhs <- read_rds("data/dhs_cluster_level_wealth.rds") #ave rwi normalized by survey for all dhs clusters in countries with wealth data

loc <- dhs %>% ungroup() %>%  dplyr::select(cluster_id, lon, lat) %>% distinct()
    
    # read in (publicly available) pm data and process for fig

    # grid <- raster(raster("data/annual_001/V5GL02.HybridPM25.Global.199801-199812.nc"))
    # grid[] <- 1:ncell(grid)
    # 
    # 
    # 
    # loc$cell <- cellFromXY(grid, as.matrix(loc[,c("lon","lat")]))
    # loc$row <- rowFromCell(grid, loc$cell)
    # loc$col <- colFromCell(grid, loc$cell)
    # 
    # 
    # fls <- list.files("1_input/data/vandonk/annual_001/"); fls <- fls[18:22]
    # datmat <- matrix(nrow = nrow(loc), ncol = length(fls))
    # 
    # rdat <- brick(x = grid, nl = length(fls))
    # 
    #       for (i in 1:length(fls)){
    #       
    #         rdat[[i]] <- (raster(paste("1_input/data/vandonk/annual_001/", fls[i], sep = "")))
    #       
    #       }
    #   
    #   yy <- rdat[loc$cell]
    #   
    #   write_rds(yy, file = "data/annual_dhs_vandonk_pm_raw.rds", compress = "gz")
    #   
    
# 
# 
#   output <- data.frame(cluster_id = loc$cluster_id, lon = loc$lon, lat = loc$lat, pm_mean = as.numeric(apply(yy,1,function(x){mean(x, na.rm = T)})))
#   output$pm_median = apply(yy,1,function(x){median(x, na.rm = T)})
#   output$pm_max = apply(yy,1,function(x){max(x, na.rm = T)})
#   output$pm_max[is.infinite(output$pm_max)]<-NA
#   output$pm_mean = round(output$pm_mean, 2)
#   
#   
#   
  #now need to repeat code for anthro pm data
  
  
  # 
  # grid <- raster(raster("1_input/data/vandonk/annual_no_ss/ACAG_PM25_noDUSTnoSEASALT_GWR_V4GL03_201401_201412_0p01.nc"))
  # grid <- flip(flip(t(grid), direction = "x"), direction = "y")
  # grid[] <- 1:ncell(grid)
  # 
  # 
  # 
  # loc$cell <- cellFromXY(grid, as.matrix(loc[,c("lon","lat")]))
  # loc$row <- rowFromCell(grid, loc$cell)
  # loc$col <- colFromCell(grid, loc$cell)
  # 
  # 
  # fls <- list.files("1_input/data/vandonk/annual_no_ss/")[16:20]
  # 
  # 
  # rdat <- raster::brick(x = grid, nl = length(fls))
  # 
  # for (i in 1:length(fls)){
  #   
  #   rdat[[i]] <- flip(flip(t(raster(paste("1_input/data/vandonk/annual_no_ss/", fls[i], sep = ""))), direction = "x"), direction = "y") 
  #   
  # }
  # 
  # 
  # zz <- rdat[loc$cell]
  # 
  # write_rds(zz, file = "data/annual_dhs_vandonk_pm_anthro_raw.rds", compress = "gz")
#   
#   
# #  output <- data.frame(cluster_id = loc$cluster_id, lon = loc$lon, lat = loc$lat, pm_mean = as.numeric(apply(yy,1,function(x){mean(x, na.rm = T)})))
#   output$pm_anthro_mean = as.numeric(apply(zz,1,function(x){mean(x, na.rm = T)}))
#   output$pm_anthro_median = apply(zz,1,function(x){median(x, na.rm = T)})
#   output$pm_anthro_max = apply(zz,1,function(x){max(x, na.rm = T)})
#   output$pm_anthro_max[is.infinite(output$pm_max)]<-NA
#   
#   
  
  
  
  
  
  
  
  
  # write_rds(output, file = "1_input/data/vandonk_rds/annual_dhs_vandonk_pm_clean.rds", compress = "gz")
  
  

  ########################################################################################################################
  
  
  dhs <- read_rds("data/dhs_cluster_level_wealth.rds") %>% drop_na(wealth)
  pm <- read_rds("data/annual_dhs_vandonk_pm_clean.rds")

  dhs <- dhs %>% left_join(pm)
  
  
  #regression
  
  dhs$wealth_n <- dhs$wealth/10000
  
  # write_csv(dhs, file = "~/Dropbox (Personal)/Global pollution (Selective Sync Conflict)/2_analysis/data/dhs_cluster_wealth_pm_anthro.csv")
  
          
          mod <- feols(pm_mean ~ wealth_n | svy_id, data = dhs)
          
          cty_list <- sort(unique(dhs$country))
          cty_mod <- data.frame(country = cty_list, est = NA, se = NA)
          
          for(i in 1:length(cty_list)){
            mod1 <- feols(pm_mean ~ wealth | svy_id, data = dhs[dhs$country == cty_list[i],])
            cty_mod[i,"est"] <- coef(mod1)[1]
            cty_mod[i,"se"] <- se(mod1)[1]
            
            
          }
          
          
          
          cty_mod <- cty_mod %>% arrange(est)
          
        
          
          svy_list <- sort(unique(dhs$svy_id))
          dhs$wealth_p <- NA
          
          for(i in 1:length(svy_list)){
            dhs$wealth_p[dhs$svy_id == svy_list[i]] <-  statar::xtile(dhs$wealth[dhs$svy_id == svy_list[i]], n = 100)
                                      }
          
          
          mod <- feols(pm_mean ~ wealth_p | svy_id, data = dhs)
          
          
          dhs <- dhs %>% arrange(svy_id, wealth, wealth_p)
          

  
  
pdf(file = "figures/figure_SI7.pdf", width = 10, height = 4)
  plot(1:nrow(cty_mod), cty_mod$est, col = NA, axes = F, xlab = "", ylab = "", ylim = c(-1.3e-04, 1e-04))
    abline(h=0, col = 'red')
    segments(x0 = 1:nrow(cty_mod), y0=cty_mod$est + qnorm(0.025)*cty_mod$se, y1=cty_mod$est + qnorm(0.975)*cty_mod$se, col= 'navy',lwd=2 )
    points(1:nrow(cty_mod),cty_mod$est, pch = 21, cex=1.2, col = 'navy', bg = 'white')
  axis(2, tick = T, las=2, at = seq(-1e-04, 1e-04, 5e-05), labels = seq(-1e-04, 1e-04, 5e-05)*1e5)
  
  text(x = 1:nrow(cty_mod), y = cty_mod$est + qnorm(0.025)*cty_mod$se - 0.1e-04, labels = cty_mod$country, srt=90, adj= 1, cex=0.5)
  
dev.off()



